Eventdf = read_csv("EventCostBDD.csv", skip = 1)
## Rows: 376 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): Name, Disaster, Disaster Group, Begin Date, End Date, Central Day,...
## dbl (17): Yb, Mb, Db, Ye, Me, De, Total CPI-Adjusted Cost (Millions of Dolla...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(Eventdf)
## # A tibble: 6 × 24
## Name Disaster `Disaster Group` `Begin Date` Yb Mb Db `End Date`
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <chr>
## 1 Southern … Flooding SevStorm/Flood 4/10/1980 1980 4 10 4/17/1980
## 2 Hurricane… Tropica… Tropical Cyclone 8/7/1980 1980 8 7 8/11/1980
## 3 Central/E… Drought WF/Drought 6/1/1980 1980 6 1 11/30/1980
## 4 Florida F… Freeze Winter/Freeze 1/12/1981 1981 1 12 1/14/1981
## 5 Severe St… Severe … SevStorm/Flood 5/5/1981 1981 5 5 5/10/1981
## 6 Midwest/S… Winter … Winter/Freeze 1/8/1982 1982 1 8 1/16/1982
## # ℹ 16 more variables: Ye <dbl>, Me <dbl>, De <dbl>,
## # `Total CPI-Adjusted Cost (Millions of Dollars)` <dbl>, Deaths <dbl>,
## # `Durn (days)` <dbl>, `Durn (Weeks)` <dbl>, `Durn (Cal Mos)` <dbl>,
## # `Central Day` <chr>, `Day of W_yr` <dbl>, `MidPt Mo` <dbl>, W_Yr <dbl>,
## # W_Week <dbl>, W_Mo <dbl>, SeasNum <dbl>, Season <chr>
disas <- Eventdf %>%
select(`Begin Date`, `Disaster Group`, Disaster, Season, `Total CPI-Adjusted Cost (Millions of Dollars)`, W_Mo, SeasNum) %>%
group_by(`Disaster Group`) %>%
mutate(Date = mdy(`Begin Date`))
ggplot() +
geom_line(aes(x = Date, y = W_Mo, color = `Disaster Group`), data = disas) +
labs(x = "Time", y = "Month Number", title = "Month Number for Different Disasters") +
theme_classic()
disas1 <- Eventdf %>%
select(`Begin Date`, `Disaster Group`, Disaster, Season, `Total CPI-Adjusted Cost (Millions of Dollars)`, W_Mo) %>%
group_by(`Disaster Group`) %>%
mutate(Date = mdy(`Begin Date`)) %>%
filter(`Disaster Group` == "SevStorm/Flood")
ggplot(aes(x = Date, y = W_Mo), data = disas1) +
geom_line() +
geom_smooth(se = FALSE, span = 0.2, color = "red") +
geom_smooth(se = FALSE, span = 1, color = "blue") +
geom_smooth(se = FALSE, span = 0.6, color = "green") +
labs(x = "Time", y = "Month Number", title = "Month Numbers for SevStorm/Flood") +
theme_classic()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
ggplot() +
geom_line(aes(x = Date, y = W_Mo, color = `Disaster Group`, group = SeasNum), data = disas) +
labs(x = "Time", y = "Month Number", title = "Month Number for Different Disasters Grouped by SeasNum and Disaster Group") +
theme_classic()
disas2 <- Eventdf %>%
select(`Begin Date`, `Disaster Group`, Disaster, Season, `Total CPI-Adjusted Cost (Millions of Dollars)`, W_Mo) %>%
group_by(`Disaster Group`) %>%
mutate(Date = mdy(`Begin Date`)) %>%
filter(`Disaster Group` == "Winter/Freeze")
ggplot(aes(x = Date, y = W_Mo), data = disas2) +
geom_line() +
geom_smooth(se = FALSE, span = 0.2, color = "red") +
geom_smooth(se = FALSE, span = 1, color = "blue") +
geom_smooth(se = FALSE, span = 0.6, color = "green") +
labs(x = "Time", y = "Month Number", title = "Month Number for Winter/Freeze") +
theme_classic()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
disas <- Eventdf %>%
select(`Begin Date`, `Disaster Group`, Disaster, Season, `Total CPI-Adjusted Cost (Millions of Dollars)`, W_Mo, SeasNum) %>%
group_by(`Disaster Group`) %>%
mutate(Date = mdy(`Begin Date`))
ggplot() +
geom_line(aes(x = Date, y = SeasNum, color = `Disaster Group`), data = disas) +
labs(x = "Time", y = "Season Number", title = "Month Number for Different Disasters by Season Number") +
theme_classic()
disas1 <- Eventdf %>%
select(`Begin Date`, `Disaster Group`, Disaster, Season, `Total CPI-Adjusted Cost (Millions of Dollars)`, W_Mo, SeasNum) %>%
group_by(`Disaster Group`) %>%
mutate(Date = mdy(`Begin Date`)) %>%
filter(`Disaster Group` == "SevStorm/Flood")
ggplot(aes(x = Date, y = SeasNum), data = disas1) +
geom_line() +
geom_smooth(se = FALSE, span = 0.2, color = "red") +
geom_smooth(se = FALSE, span = 1, color = "blue") +
geom_smooth(se = FALSE, span = 0.6, color = "green") +
labs(x = "Time", y = "Month Number", title = "Month Number SevStorm/Flood by Season Number") +
theme_classic()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
disas1 <- Eventdf %>%
select(`Begin Date`, `Disaster Group`, Disaster, Season, `Total CPI-Adjusted Cost (Millions of Dollars)`, W_Mo, SeasNum) %>%
group_by(`Disaster Group`) %>%
mutate(Date = mdy(`Begin Date`)) %>%
filter(`Disaster Group` == "Winter/Freeze")
ggplot(aes(x = Date, y = SeasNum), data = disas1) +
geom_line() +
geom_smooth(se = FALSE, span = 0.2, color = "red") +
geom_smooth(se = FALSE, span = 1, color = "blue") +
geom_smooth(se = FALSE, span = 0.6, color = "green") +
labs(x = "Time", y = "Month Number", title = "Month Number Winter/Freeze by Season Number") +
theme_classic()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
disas1 <- Eventdf %>%
select(`Begin Date`, `Disaster Group`, Disaster, Season, `Total CPI-Adjusted Cost (Millions of Dollars)`, W_Mo, SeasNum) %>%
group_by(`Disaster Group`) %>%
mutate(Date = mdy(`Begin Date`)) %>%
filter(`Disaster Group` == "Tropical Cyclone")
ggplot(aes(x = Date, y = SeasNum), data = disas1) +
geom_line() +
geom_smooth(se = FALSE, span = 0.2, color = "red") +
geom_smooth(se = FALSE, span = 1, color = "blue") +
geom_smooth(se = FALSE, span = 0.6, color = "green") +
labs(x = "Time", y = "Month Number", title = "Month Number Tropical Cyclone by Season Number") +
theme_classic()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
disas1 <- Eventdf %>%
select(`Begin Date`, `Disaster Group`, Disaster, Season, `Total CPI-Adjusted Cost (Millions of Dollars)`, W_Mo, SeasNum) %>%
group_by(`Disaster Group`) %>%
mutate(Date = mdy(`Begin Date`)) %>%
filter(`Disaster Group` == "WF/Drought")
ggplot(aes(x = Date, y = SeasNum), data = disas1) +
geom_line() +
geom_smooth(se = FALSE, span = 0.2, color = "red") +
geom_smooth(se = FALSE, span = 1, color = "blue") +
geom_smooth(se = FALSE, span = 0.6, color = "green") +
labs(x = "Time", y = "Month Number", title = "Month Number WF/Drought by Season Number") +
theme_classic()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
disas_WF <- Eventdf %>%
select(`Begin Date`, `Disaster Group`, Ye) %>%
filter(`Disaster Group` == "WF/Drought") %>%
group_by(Ye) %>%
mutate(Date = mdy(`Begin Date`)) %>%
summarise(Ye, Date = mdy(`Begin Date`),
number = n()) %>%
drop_na()
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'Ye'. You can override using the `.groups`
## argument.
disas_Winter <- Eventdf %>%
select(`Begin Date`, `Disaster Group`, Ye) %>%
filter(`Disaster Group` == "Winter/Freeze") %>%
group_by(Ye) %>%
mutate(Date = mdy(`Begin Date`)) %>%
summarise(Ye, Date = mdy(`Begin Date`),
number = n()) %>%
drop_na()
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'Ye'. You can override using the `.groups`
## argument.
disas_Tropical <- Eventdf %>%
select(`Begin Date`, `Disaster Group`, Ye) %>%
filter(`Disaster Group` == "Tropical Cyclone") %>%
group_by(Ye) %>%
mutate(Date = mdy(`Begin Date`)) %>%
summarise(Ye, Date = mdy(`Begin Date`),
number = n()) %>%
drop_na()
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'Ye'. You can override using the `.groups`
## argument.
disas_Flood <- Eventdf %>%
select(`Begin Date`, `Disaster Group`, Ye) %>%
filter(`Disaster Group` == "SevStorm/Flood") %>%
group_by(Ye) %>%
mutate(Date = mdy(`Begin Date`)) %>%
summarise(Ye, Date = mdy(`Begin Date`),
number = n()) %>%
drop_na()
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'Ye'. You can override using the `.groups`
## argument.
colors <- c("WF/Drought" = "orange", "Winter/Freeze" = "slategray2", "Tropical Cyclone" = "lightseagreen", "SevStorm/Flood" = "blue")
ggplot() +
geom_line(aes(x=Date, y=number, color = "WF/Drought"), size = 1, data=disas_WF) +
geom_line(aes(x=Date, y=number, color = "Winter/Freeze"), size = 1, data=disas_Winter) +
geom_line(aes(x=Date, y=number, color = "Tropical Cyclone"), size = 1, data=disas_Tropical) +
geom_line(aes(x=Date, y=number, color = "SevStorm/Flood"), size = 1, data = disas_Flood) +
labs(x="Time", y = "Number of Disasters Per Annum", title = "The Number of Disasters Per Annum By Disaster Group", color = "Legend") +
theme_bw() +
scale_color_manual(values = colors)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
There does appear to be slight changes in climate especially with an increase in flooding and severe storms. There has also been a slight increase in Tropical Cyclones over the past few years, However, Winter/Freeze and Drought have stayed the same.
When looking at climate change, natural disasters is a place that people want to see information. Looking at the plot titled The number of disasters Per Annum By Disaster Group, there are a couple of trends that can be seen. First off it can be seen that the number of disasters such as tropical cyclones and Severe Storms/Flooding has increased tremendously over the past few years, which is very different compared to what their numbers were back in the 1980s and 1990s. When looking at the plot labeled Month Numbers for Different Disasters it can be seen that there is seasonality amongst a couple of the disaster groups. For specifically WF/Drought, Winter/Freeze, and Tropical Cyclone’s it can be seen that for these plots, they all stay within one season or just a couple of months. For WF/Drought, it can be seen that this normally happens between June and August. For Winter/Freeze this normally happens between January and March. For Tropical Cyclones, these can happen anywhere between July and November. All of these shows the seasonality, and paired with the plot titled the number of disaster Per Annum by Disaster Group, we could come to the conclusion that there is some example of climate change as a result of disaster types and seasonality, however, seasonality plays a very little affect, as the Disaster group that went through the biggest change, is the only disaster type that did not show seasonality. That disaster group is severe storms, which was shown to be a year round occurrence.
$ _t = _0 +_1 + 2 + … + {11} $
season_wf <- Eventdf %>%
dplyr::select(Mb, `Disaster Group`, `Total CPI-Adjusted Cost (Millions of Dollars)`) %>%
filter(`Disaster Group` == "WF/Drought") %>%
drop_na()
season_wf
## # A tibble: 53 × 3
## Mb `Disaster Group` `Total CPI-Adjusted Cost (Millions of Dollars)`
## <dbl> <chr> <dbl>
## 1 6 WF/Drought 39579
## 2 6 WF/Drought 9307.
## 3 6 WF/Drought 5050.
## 4 6 WF/Drought 53010.
## 5 6 WF/Drought 7628.
## 6 6 WF/Drought 1666.
## 7 3 WF/Drought 6859.
## 8 10 WF/Drought 7359
## 9 6 WF/Drought 2705.
## 10 9 WF/Drought 2908.
## # ℹ 43 more rows
season_wf.ts <- ts(season_wf$`Total CPI-Adjusted Cost (Millions of Dollars)`, start = c(1980, 6), frequency = 12)
month <- season(season_wf.ts)
seasonal_means <- lm(season_wf.ts ~month)
summary(seasonal_means)
##
## Call:
## lm(formula = season_wf.ts ~ month)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12815 -6028 -2808 2235 38271
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11899 5722 2.080 0.0439 *
## monthFebruary -7676 8092 -0.949 0.3484
## monthMarch 874 8092 0.108 0.9145
## monthApril -880 8092 -0.109 0.9139
## monthMay -6606 8092 -0.816 0.4190
## monthJune 290 7676 0.038 0.9700
## monthJuly -5804 7676 -0.756 0.4539
## monthAugust -4143 7676 -0.540 0.5923
## monthSeptember 2840 7676 0.370 0.7133
## monthOctober -4734 7676 -0.617 0.5409
## monthNovember -4028 8092 -0.498 0.6213
## monthDecember -1204 8092 -0.149 0.8824
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11440 on 41 degrees of freedom
## Multiple R-squared: 0.09231, Adjusted R-squared: -0.1512
## F-statistic: 0.3791 on 11 and 41 DF, p-value: 0.957
# Get fitted values
fitted_model <- data.frame(date=time(season_wf.ts), fit=fitted(seasonal_means))
autoplot(season_wf.ts) +
labs(y="Cost in Millions") +
geom_segment(aes(x=date-0.05, xend=date+0.05, y=fit, yend=fit), size=2, color="red", data=fitted_model)
\(\mu_t = \beta_0 +\beta_1 \text{MonthFebraury} + \beta_2 \text{MonthMarch} + ... + \beta_{11} \text{MonthDecember}\)
season_Winter <- Eventdf %>%
select(Mb, `Disaster Group`, `Total CPI-Adjusted Cost (Millions of Dollars)`) %>%
filter(`Disaster Group` == "Winter/Freeze") %>%
drop_na()
season_winter.ts <- ts(season_Winter$`Total CPI-Adjusted Cost (Millions of Dollars)`, start = c(1981, 1), frequency = 12)
month <- season(season_winter.ts)
seasonal_means <- lm(season_winter.ts ~month)
summary(seasonal_means)
##
## Call:
## lm(formula = season_winter.ts ~ month)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8912.1 -1824.2 58.8 1172.7 15981.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3958.5 2981.9 1.328 0.200
## monthFebruary -1931.3 4217.0 -0.458 0.652
## monthMarch 155.4 4217.0 0.037 0.971
## monthApril -1603.5 4217.0 -0.380 0.708
## monthMay 6603.0 4217.0 1.566 0.134
## monthJune 1062.7 4217.0 0.252 0.804
## monthJuly -2102.9 4217.0 -0.499 0.624
## monthAugust 1692.0 4714.8 0.359 0.724
## monthSeptember -696.8 4714.8 -0.148 0.884
## monthOctober 3230.1 4714.8 0.685 0.502
## monthNovember -2237.2 4714.8 -0.475 0.641
## monthDecember 597.0 4714.8 0.127 0.901
##
## Residual standard error: 5165 on 19 degrees of freedom
## Multiple R-squared: 0.2832, Adjusted R-squared: -0.1317
## F-statistic: 0.6826 on 11 and 19 DF, p-value: 0.7386
# Get fitted values
fitted_model <- data.frame(date=time(season_winter.ts), fit=fitted(seasonal_means))
autoplot(season_winter.ts) +
labs(y="Cost in Millions") +
geom_segment(aes(x=date-0.05, xend=date+0.05, y=fit, yend=fit), size=2, color="red", data=fitted_model)
\(\mu_t = \beta_0 +\beta_1 \text{Month Febraury} + \beta_2 \text{MonthMarch} + ... + \beta_{11} \text{MonthDecember}\)
season_tropic <- Eventdf %>%
select(Mb, `Disaster Group`, `Total CPI-Adjusted Cost (Millions of Dollars)`) %>%
filter(`Disaster Group` == "Tropical Cyclone") %>%
drop_na()
season_tropic.ts <- ts(season_tropic$`Total CPI-Adjusted Cost (Millions of Dollars)`, start = c(1980, 8), frequency = 12)
month <- season(season_tropic.ts)
seasonal_means <- lm(season_tropic.ts ~month)
summary(seasonal_means)
##
## Call:
## lm(formula = season_tropic.ts ~ month)
##
## Residuals:
## Min 1Q Median 3Q Max
## -51244 -19025 -5986 4226 142080
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30829 17815 1.731 0.0897 .
## monthFebruary -12031 25194 -0.478 0.6351
## monthMarch -4325 25194 -0.172 0.8644
## monthApril -16388 25194 -0.650 0.5184
## monthMay -18971 25194 -0.753 0.4550
## monthJune -1416 25194 -0.056 0.9554
## monthJuly -23292 25194 -0.925 0.3597
## monthAugust -21226 24122 -0.880 0.3831
## monthSeptember -5771 24122 -0.239 0.8119
## monthOctober -22494 25194 -0.893 0.3762
## monthNovember 2731 25194 0.108 0.9141
## monthDecember 22138 25194 0.879 0.3838
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 39840 on 50 degrees of freedom
## Multiple R-squared: 0.1123, Adjusted R-squared: -0.08304
## F-statistic: 0.5748 on 11 and 50 DF, p-value: 0.8399
# Get fitted values
fitted_model <- data.frame(date=time(season_tropic.ts), fit=fitted(seasonal_means))
autoplot(season_tropic.ts) +
labs(y="Cost in Millions") +
geom_segment(aes(x=date-0.05, xend=date+0.05, y=fit, yend=fit), size=2, color="red", data=fitted_model)
\(\mu_t = \beta_0 +\beta_1 \text{Month Febraury} + \beta_2 \text{MonthMarch} + ... + \beta_{11} \text{MonthDecember}\)
season_flood <- Eventdf %>%
select(Mb, `Disaster Group`, `Total CPI-Adjusted Cost (Millions of Dollars)`) %>%
filter(`Disaster Group` == "SevStorm/Flood") %>%
drop_na()
season_flood.ts <- ts(season_flood$`Total CPI-Adjusted Cost (Millions of Dollars)`, start = c(1980, 4), frequency = 12)
month <- season(season_flood.ts)
seasonal_means <- lm(season_flood.ts ~month)
summary(seasonal_means)
##
## Call:
## lm(formula = season_flood.ts ~ month)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3644 -1274 -759 169 40212
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1979.6 823.3 2.404 0.0170 *
## monthFebruary 2874.2 1164.3 2.469 0.0143 *
## monthMarch 928.3 1164.3 0.797 0.4261
## monthApril 335.7 1149.7 0.292 0.7705
## monthMay 987.3 1149.7 0.859 0.3914
## monthJune 437.3 1164.3 0.376 0.7076
## monthJuly 1107.6 1164.3 0.951 0.3425
## monthAugust 925.0 1164.3 0.794 0.4278
## monthSeptember 1052.6 1164.3 0.904 0.3670
## monthOctober 947.3 1164.3 0.814 0.4168
## monthNovember 442.1 1164.3 0.380 0.7045
## monthDecember 200.2 1164.3 0.172 0.8636
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3589 on 218 degrees of freedom
## Multiple R-squared: 0.03906, Adjusted R-squared: -0.009432
## F-statistic: 0.8055 on 11 and 218 DF, p-value: 0.6346
# Get fitted values
fitted_model <- data.frame(date=time(season_flood.ts), fit=fitted(seasonal_means))
autoplot(season_flood.ts) +
labs(y="Cost in Millions") +
geom_segment(aes(x=date-0.05, xend=date+0.05, y=fit, yend=fit), size=2, color="red", data=fitted_model)
For the plots above, there are 4 different season means models, which have been split up by group type. The first model, represents the TS with only WF/Drought as the disaster type, looking at the visualization of this TS, it can be seen that this model does not predict the data very well. A lot of the points for this plot are not close to the lines shown below. For the 2nd model it depicts the data when Winter/Freeze is the disaster type shown. This plot does a better job of representing the data, however there are three boxes which are not close to any of the trend lines. For the the third plot, it depicts the model where Tropical Cyclones is the disaster type. This model again depicts the data very well except for where the trend line gets very high. The last model is the model where SevStorm/Flood is the disaster type. This model is predicted the best by the lm model. This makes sense as this data type does not have a seasonal relationship quite like the others and this model is very good.
$ _t = _0 +_1 (2 f t) + _2 (2 f t) $
season_cos <- Eventdf %>%
select(Mb, `Disaster Group`, `Total CPI-Adjusted Cost (Millions of Dollars)`) %>%
drop_na()
season_cos.ts <- ts(season_cos$`Total CPI-Adjusted Cost (Millions of Dollars)`, start = c(1980, 4), frequency = 12)
t <- time(season_cos.ts)
# Compute the predictors
X1 <- cos(2*pi*t)
X2 <- sin(2*pi*t)
# Fit the model
cosine_model <- lm(season_cos.ts ~ X1 + X2)
summary(cosine_model)
##
## Call:
## lm(formula = season_cos.ts ~ X1 + X2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8255 -5799 -3648 -1642 186078
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7056.8 912.6 7.733 9.88e-14 ***
## X1 -263.6 1290.5 -0.204 0.8383
## X2 2359.7 1290.5 1.828 0.0683 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17690 on 373 degrees of freedom
## Multiple R-squared: 0.009003, Adjusted R-squared: 0.003689
## F-statistic: 1.694 on 2 and 373 DF, p-value: 0.1851
a <- seq(1980, 2012, by=1/12)[-1]
beta0 <- cosine_model$coefficients[1]
beta1 <- cosine_model$coefficients[2]
beta2 <- cosine_model$coefficients[3]
fitted_cosine <- beta0 + beta1*cos(2*pi*a) + beta2*sin(2*pi*a)
cosine_fit <- data.frame(x=a, y=fitted_cosine)
autoplot(season_cos.ts) +
labs(y="Total CPI Adjusted Cost (in Millions)") +
geom_line(aes(x=x, y=y), color="red", data=cosine_fit)
The sinusoidal model depicted on the graph above, does an okay job at representing the data. This TS data includes all of the disaster types and is not split for this data. The Sinuosoidal model looks very good for the data, where the disasters are more, normal, however, if a year has a high value for the total cost, this model becomes a very poor representation of the data. looking at specifically the year 2003 the disaster relief cost is around $150,000 however, the trend line fit by the sinusoidal model is barely above 0. This model struggles compared to the seasonal means models above as a result of it being bounded by axis.
PopHo <- read_excel("ExposureS_PopHo.xlsx")
PopHo
## # A tibble: 2,250 × 7
## `ACI Region` `ACI Region Name` Subregions `W_Yr-S` MidSeason Popn
## <chr> <chr> <chr> <chr> <dttm> <dbl>
## 1 ALA Alaska AK 1961-1 1961-01-15 00:00:00 229000
## 2 ALA Alaska AK 1961-2 1961-04-15 00:00:00 227000
## 3 ALA Alaska AK 1961-3 1961-07-15 00:00:00 240000
## 4 ALA Alaska AK 1961-4 1961-10-15 00:00:00 238000
## 5 ALA Alaska AK 1962-1 1962-01-15 00:00:00 236000
## 6 ALA Alaska AK 1962-2 1962-04-15 00:00:00 235000
## 7 ALA Alaska AK 1962-3 1962-07-15 00:00:00 248000
## 8 ALA Alaska AK 1962-4 1962-10-15 00:00:00 246000
## 9 ALA Alaska AK 1963-1 1963-01-15 00:00:00 244000
## 10 ALA Alaska AK 1963-2 1963-04-15 00:00:00 242000
## # ℹ 2,240 more rows
## # ℹ 1 more variable: HUnits <dbl>
GPL_data <- PopHo %>%
filter(`ACI Region` == "GPL") %>%
select(`W_Yr-S`, Popn, HUnits) %>%
mutate(UnitsPerPerson = HUnits/Popn) %>%
drop_na()
GPL_ts <- ts(GPL_data$UnitsPerPerson, start = c(1961, 1), frequency = 4)
autoplot(GPL_ts) + labs(y = "Housing Units Per Person")
ggAcf(GPL_ts)
ggPacf(GPL_ts)
ggseasonplot(GPL_ts)
qqnorm(GPL_ts)
qqline(GPL_ts, col = "red")
autoplot(stl(GPL_ts, s.window = "periodic"))
Starting with the TS Plot above, there a couple of trends from this data. The first appeared trend is that this TS does not have a constant mean, this can be seen as there is a drastic increase in mean as the time increase. This would result in this TS not being stationary. In looking at the ggAcf and the ggPacf we can see that there are no trends of either white noise or a random walk. Looking at specifically the ggPacf, we see a graph that looks to be more like a moving average plot. When looking at the ggseasonplot we can definite trends of seasonality, especialy as that data gains more time. In looking at the split plot above, we can see that the remainder appears relatively normal until the data gets to be around 2020, when there is a major increase. Checking assumptions with the normal QQ plot, we can see that the plot fails the fat pencil test and the data is not normal.
GPL_train <- window(GPL_ts, end = c(2021, 4))
GPL_test <- window(GPL_ts, start = c(2022, 1), end = c(2022, 2))
GPL_test
## Qtr1 Qtr2
## 2022 0.4570064 0.4563333
In the above code we make our test model the first 2 quarters of 2022.
## Creating different Models
GPL_exp <- HoltWinters(GPL_train, beta=FALSE, gamma=FALSE) # Simple Exponential Smoothing
GPL_trend <- HoltWinters(GPL_train, gamma=FALSE) # Holt Model (Holt Winters, trend only)
GPL_season <- HoltWinters(GPL_train, beta=FALSE) # forcing trend coefficient to zero
GPL_hw <- HoltWinters(GPL_train) # Hold Winters (trend & Season)
## Forcasting the models into the future
GPL.exp <- forecast(GPL_exp, h=2)
GPL.trend <- forecast(GPL_trend, h=length(GPL_test))
GPL.season <- forecast(GPL_season, h=length(GPL_test))
GPL.hw <- forecast(GPL_hw, h=length(GPL_test))
## PI=FALSE says NOT to plot Prediction intervals
autoplot(window(GPL_train, start=c(1961,1))) +
autolayer(GPL.exp, PI=FALSE, series="Exp Smoothing") +
autolayer(GPL.trend, PI=FALSE, series="HW Trend Only") +
autolayer(GPL.hw, PI=FALSE, series="Holt Winters") +
autolayer(GPL.season, PI=FALSE, series="HW Season Only") +
autolayer(GPL_test, series="True Housing Per Person", size=0.5) +
labs(x="Year", y="Great Plain Lakes, Housing Units Per Person") +
theme_bw()
Looking at the plot above, it can be seen that all but 1 of the models appears to predict the next quarter Housing Per Person number very well. The one model that appears to struggle in this department is the HW trend only model, which has a starting well lower than the rest.
autoplot(window(GPL_train, start=c(1961,1))) +
autolayer(GPL.season, series="HW Season Only") +
autolayer(GPL_test, series="True Housing Per Person", size=0.5) +
labs(x="Year", y="Great Plain Lakes, Housing Units Per Person") +
theme_bw()
Looking at the season Only vs True Housing Per person, we can see that
both models appear to predict the data really well, however looking at
the darker lines on the inside, it would appear that both very slightly
miss the starting points.
accuracy(GPL.exp, GPL_test)
## ME RMSE MAE MPE MAPE
## Training set 0.0008235133 0.0027276475 0.0015906402 0.2075424 0.3931337
## Test set -0.0006795475 0.0007583207 0.0006795475 -0.1488594 0.1488594
## MASE ACF1 Theil's U
## Training set 0.6118987 -0.05141148 NA
## Test set 0.2614131 -0.50000000 1.509581
accuracy(GPL.trend, GPL_test)
## ME RMSE MAE MPE MAPE
## Training set 5.949962e-07 0.002406355 0.001705079 0.00430616 0.4210345
## Test set 3.186708e-03 0.003187287 0.003186708 0.69780504 0.6978050
## MASE ACF1 Theil's U
## Training set 0.6559217 0.06448426 NA
## Test set 1.2258853 -0.50000000 4.644161
accuracy(GPL.season, GPL_test)
## ME RMSE MAE MPE MAPE
## Training set 0.000653508 0.001937809 0.001252501 0.1658503 0.3019220
## Test set -0.002014660 0.002033476 0.002014660 -0.4412082 0.4412082
## MASE ACF1 Theil's U
## Training set 0.4818208 0.08713236 NA
## Test set 0.7750137 -0.50000000 3.403139
accuracy(GPL.hw, GPL_test)
## ME RMSE MAE MPE MAPE
## Training set -4.605059e-05 0.0017164144 0.0008790562 -0.007680859 0.2066577
## Test set 7.015981e-04 0.0008515571 0.0007015981 0.153555769 0.1535558
## MASE ACF1 Theil's U
## Training set 0.3381615 0.2438169 NA
## Test set 0.2698957 -0.5000000 0.3253477
tab <- rbind(accuracy(GPL.exp, GPL_test)[2,2:3],
accuracy(GPL.trend, GPL_test)[2,2:3],
accuracy(GPL.season, GPL_test)[2,2:3],
accuracy(GPL.hw, GPL_test)[2,2:3] )
rownames(tab) <- c("Exp Smoothing", "Holt (trend)", "HW (Season only)", "Holt Winters")
kable(tab)
| RMSE | MAE | |
|---|---|---|
| Exp Smoothing | 0.0007583 | 0.0006795 |
| Holt (trend) | 0.0031873 | 0.0031867 |
| HW (Season only) | 0.0020335 | 0.0020147 |
| Holt Winters | 0.0008516 | 0.0007016 |
Looking at the table output above, we can see that every model has a very good accuracy for predicting the data, ultimately, the Holt-Winters Exponential model has lowest RMSE, so this will be the best model for predicting the data.
GPL_exp.full <- HoltWinters(GPL_ts, beta=FALSE, gamma=FALSE) # Holt winters model, exp model
GPL_exp.forecast <- forecast(GPL_exp.full, h=2)
autoplot(window(GPL_ts, start=c(1961,1))) +
autolayer(GPL_exp.forecast) +
labs(x="Year", y="Great Plain Lakes, Housing Units Per Person") +
theme_bw()
GPL_exp.forecast
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2022 Q3 0.4566276 0.4532982 0.4599571 0.4515357 0.4617196
## 2022 Q4 0.4566276 0.4526896 0.4605656 0.4506050 0.4626503
This model performs extremely well, as it has a very good accuracy rate. When looking at the plot, although it is small hard to see the line looks to accurately predict where I would expect the Housing per person number to go.
When reflecting on the in-class portion of the exam, I felt that it wasn’t incredibly difficult, there were a couple of questions I wasn’t expecting to see, however, there was not anything that I felt unfairly represented the course material we have learned. The hardest part of the exam was the time management, I found myself with 2 problems left, when there was 8 minutes left in the exam, I was able to finish, however, it was not my best work as I was rushing to finish. For the take-home portion of the exam, it was a lot more difficult than I was expecting. The data wrangling required for the first few problems was really difficult to work with, and understanding what you were asking for with the data also felt very difficult. The take home exam, also took me a little over 8 hours to complete, which is not something that I was expecting. I was thinking that this part of the exam would be easier going into our first exam, and as I am finishing the take home part now, I feel as though I was completely wrong. I felt that I understood all the code we went over in class, however, when it came to setting up to data to be analyzed I have struggled.